splicing <- c("SE","MXE","RI","A3SS","A5SS")
all_psi <- c()
splice_type <- c()
human <- read.delim("~/Desktop/MISO_proj/EuropeanDataset/data/Sample_Metadata_Human_Heart-Brain-Only.tsv",sep="\t",header = T,stringsAsFactors = F)
human <- human[order(human$Source.Name),]
# Color bars for heatmaps
heart.samps <- human$Source.Name[which(human$Characteristics.organism.part.=="heart")]
brain.samps <- human$Source.Name[which(human$Characteristics.organism.part.!="heart")]
half <- floor(nrow(human)/2)
# # Combine the matrices into one matrix
# for (splice in splicing){
# x <- read.csv(paste("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/EuropeanDataset/human/cohort_level/miso_counts_",splice,"_mat.csv",sep=""),header=T,row.names = 1,check.names = F)
# colnames(x) <- sub(".counts.txt","",colnames(x))
# x <- x[,order(colnames(x))]
# y <- apply(x,1,function(row){table(is.na(row))["FALSE"] >= half})
# x <- x[y==T,]
# splice_type <- c(splice_type,rep(splice,nrow(x)))
# all_psi <- rbind(all_psi,data.frame(x,SpliceType=splice))
# }
#
# save(all_psi,file="~/Desktop/MISO_proj/EuropeanDataset/data/AllPSI_human.Rdata")
load("~/Desktop/MISO_proj/EuropeanDataset/data/AllPSI_human.Rdata")
heart <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="heart")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")])
heart$Group[which(heart$DevelopStage == "19 week post conception")] <- "Fetus"
heart$Group[which(heart$DevelopStage == "13 week post conception" | heart$DevelopStage == "16 week post conception" | heart$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
heart$Group[which(heart$DevelopStage == "11 week post conception" | heart$DevelopStage == "12 week post conception")] <- "Late_Embryo"
heart$Group[which(heart$DevelopStage == "9 week post conception" | heart$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
heart$Group[which(heart$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
heart$Group[which(heart$DevelopStage == "7 week post conception")] <- "Early_Embryo"
heart$Group[which(heart$DevelopStage == "4 week post conception" | heart$DevelopStage == "5 week post conception" | heart$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"
# table(heart$DevelopStage)
table(heart$Group)
##
## adolescent Developing_Embryo Early_Embryo Fetus
## 1 5 3 3
## infant Late_Embryo middle adult Midstage_Embryo
## 4 6 1 7
## neonate toddler VeryEarly_Embryo VeryLate_Embryo
## 3 2 6 8
## young adult
## 1
colnames(all_psi) <- sub("^X","",colnames(all_psi))
heart.psi <- all_psi[,heart.samps]
heart.psi.complete <- heart.psi[complete.cases(heart.psi),]
heart.psi.complete <- heart.psi.complete[,order(colnames(heart.psi.complete))]
heart.psi.100 <- heart.psi.complete * 100 # turn psi values into integers
heart.psi.100 <- round(heart.psi.100,0)
# dim(heart.psi.100)
design <- model.matrix(~heart$Group)
y = DGEList(counts=heart.psi.100, group = heart$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:7)
tab <- as.data.frame(topTags(fit, n=300))
# translate <- c()
#
# for(splice in splicing){
# tr <- as.data.frame(fread(paste("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/", splice, ".hg38lnc.gff3.expanded_attr.csv",sep=""),header=T,stringsAsFactors = F))
# tr <- tr[which(tr$TranscriptType=="gene"),]
# tr <- tr[,c("TranscriptType","GeneSymbol","ID")]
#
# translate <- rbind(translate,tr)
# }
# head(translate)
# write.table(translate,file="/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/allSplice.hg38lnc.gff3.truncated.tsv",sep="\t",quote=F,row.names = F)
translate <- as.data.frame(fread("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/allSplice.hg38lnc.gff3.truncated.tsv"))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})
# write.table(tab,file="/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Heart_DEisoforms.tsv",sep="\t",col.names = NA)
# kable(head(tab)) %>% kable_styling()
top5 <- c("chr1:93339357:93339536:-@chr1:93337377:93339274:-@chr1:93311490:93311608:-@chr1:93305388:93305470:-",
"chr1:93339357:93339536:-@chr1:93338230:93339274:-@chr1:93311490:93311608:-@chr1:93305388:93305470:-",
"chr1:93339357:93339536:-@chr1:93338230:93339274:-@chr1:93305388:93305470:-",
"chr15:41892777:41893078:+@chr15:41893756|41895584:41897521:+",
"chr1:93339357:93339536:-@chr1:93337377:93339274:-@chr1:93305388:93305470:-")
top5 <- as.data.frame(heart.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
heart$Group <- factor(heart$Group, levels=c("VeryEarly_Embryo",
"Early_Embryo",
"Developing_Embryo",
"Midstage_Embryo",
"Late_Embryo",
"VeryLate_Embryo",
"Fetus",
"neonate",
"infant",
"toddler",
# "school age child",
"adolescent",
"young adult",
"middle adult"))
# "elderly"))
top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,heart,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") +
facet_wrap(~Row.names) + theme_bw() +
theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
strip.text.x = element_text(color="black",face = "bold",size=7))
heart <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="heart")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")])
heart$Group[which(heart$DevelopStage == "19 week post conception")] <- "Fetus"
heart$Group[which(heart$DevelopStage == "13 week post conception" | heart$DevelopStage == "16 week post conception" | heart$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
heart$Group[which(heart$DevelopStage == "11 week post conception" | heart$DevelopStage == "12 week post conception")] <- "Late_Embryo"
heart$Group[which(heart$DevelopStage == "9 week post conception" | heart$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
heart$Group[which(heart$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
heart$Group[which(heart$DevelopStage == "7 week post conception")] <- "Early_Embryo"
heart$Group[which(heart$DevelopStage == "4 week post conception" | heart$DevelopStage == "5 week post conception" | heart$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"
heart$Group[which(heart$DevelopStage == "middle adult" | heart$DevelopStage == "young adult" | heart$DevelopStage == "adolescent")] <- "Adult"
heart$Group[which(heart$DevelopStage == "toddler")] <- "infant"
# heart <- heart[-which(heart$DevelopStage == "adolescent"),]
heart.samps <- heart$Samples
table(heart$Group)
##
## Adult Developing_Embryo Early_Embryo Fetus
## 3 5 3 3
## infant Late_Embryo Midstage_Embryo neonate
## 6 6 7 3
## VeryEarly_Embryo VeryLate_Embryo
## 6 8
heart.psi <- all_psi[,heart.samps]
heart.psi.complete <- heart.psi[complete.cases(heart.psi),]
heart.psi.complete <- heart.psi.complete[,order(colnames(heart.psi.complete))]
heart.psi.100 <- heart.psi.complete * 100 # turn psi values into integers
heart.psi.100 <- round(heart.psi.100,0)
design <- model.matrix(~heart$Group)
y = DGEList(counts=heart.psi.100, group = heart$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:7)
tab <- as.data.frame(topTags(fit, n=300))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})
# write.table(tab,file="/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Heart_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
| logFC.heart.GroupDeveloping_Embryo | logFC.heart.GroupEarly_Embryo | logFC.heart.GroupFetus | logFC.heart.Groupinfant | logFC.heart.GroupLate_Embryo | logFC.heart.GroupMidstage_Embryo | logCPM | F | PValue | FDR | Gene | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| chr15:41892777:41893078:+@chr15:41893756|41895584:41897521:+ | -4.389741 | -5.046646 | -2.354436 | -0.0135916 | -3.383262 | -3.582378 | 8.023539 | 33.71331 | 0 | 0 | AC020659.1 |
| chr15:41892777:41893078:+@chr15:41893756|41895580:41897521:+ | -4.246148 | -4.968700 | -2.303531 | -0.0254198 | -3.306240 | -3.560064 | 7.961190 | 31.76561 | 0 | 0 | AC020659.1 |
| chr15:41892777:41893078:+@chr15:41893742|41895584:41897521:+ | -4.318738 | -5.042154 | -2.295809 | -0.0165543 | -3.379134 | -3.523287 | 8.024377 | 31.20384 | 0 | 0 | AC020659.1 |
| chr15:41892777:41893078:+@chr15:41893742|41895580:41897521:+ | -4.251849 | -4.812156 | -2.255668 | -0.0369236 | -3.366362 | -3.484122 | 7.965296 | 29.89463 | 0 | 0 | AC020659.1 |
| chr15:41892777:41893078:+@chr15:41893742|41895523:41897521:+ | -4.167608 | -4.730326 | -2.173184 | 0.0837477 | -3.403682 | -3.402290 | 7.967010 | 27.61017 | 0 | 0 | AC020659.1 |
| chr15:41892777:41893078:+@chr15:41893756|41895523:41897521:+ | -4.155502 | -4.571593 | -2.215309 | 0.0989547 | -3.332308 | -3.390214 | 7.957654 | 27.46999 | 0 | 0 | AC020659.1 |
top5 <- c("chr15:41892777:41893078:+@chr15:41893756|41895584:41897521:+",
"chr15:41892777:41893078:+@chr15:41893756|41895580:41897521:+",
"chr15:41892777:41893078:+@chr15:41893742|41895584:41897521:+",
"chr15:41892777:41893078:+@chr15:41893742|41895580:41897521:+",
"chr15:41892777:41893078:+@chr15:41893742|41895523:41897521:+")
top5 <- as.data.frame(heart.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
heart$Group <- factor(heart$Group, levels=c("VeryEarly_Embryo",
"Early_Embryo",
"Developing_Embryo",
"Midstage_Embryo",
"Late_Embryo",
"VeryLate_Embryo",
"Fetus",
"neonate",
"infant",
"toddler",
"Adult"))
top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,heart,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") +
facet_wrap(~Row.names) + theme_bw() +
theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
strip.text.x = element_text(color="black",face = "bold",size=7))
new <- heart.psi.complete[rownames(tab)[which(tab$FDR<0.05)],]
rownames(new) <- sapply(rownames(new),function(name){paste(translate$GeneSymbol[which(translate$ID==name)],name,sep="_")})
new$transcripts <- rownames(new)
new.melt <- melt(new)
new.melt <- merge(new.melt,heart,by.x=2,by.y=1)
# head(new.melt)
transcripts <- unique(new.melt$transcripts)
pdf("/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Heart_DEisoforms_Boxplots.pdf",width = 9,height = 7)
for (trans in transcripts){
df <- new.melt[which(new.melt$transcripts==trans),]
print(ggplot(df,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") + theme_bw() +
ggtitle(paste(str_wrap(trans, width = 75))) + xlab("") +
theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
plot.title = element_text(hjust=0.5,size = 12), legend.position = "none") )
}
dev.off()
heart <- heart[order(heart$Group),]
ra <- HeatmapAnnotation(Group = heart$Group,col = list(Group=c("VeryEarly_Embryo"= "purple",
"Adult" = "coral3",
"Midstage_Embryo"="blue",
"Early_Embryo" = "yellow",
"Late_Embryo" = "brown",
"VeryLate_Embryo"= "pink",
# "Adolescent"= "cyan3",
"Developing_Embryo"="deeppink1",
"Fetus" = "goldenrod",
"neonate" = "cyan3",
"infant" = "green")))
# "toddler" = "midnightblue")))
#ha <- HeatmapAnnotation(SpliceType = splice_type, col=list(SpliceType=c("SE"="red","MXE"="blue","RI"="cyan3","A3SS"="yellow","A5SS"="orange")))
lt05 <- rownames(tab)[which(tab$FDR < 0.05)]
toplot <- heart.psi.complete[lt05,]
toplot <- medianCtr(toplot)
toplot <- toplot[,heart$Samples]
draw(Heatmap(as.matrix(toplot), cluster_columns = F, name = "PSI",column_title = "Top DE Transcripts (FDR < 0.05)\nhuman Heart Samples", clustering_distance_columns = "pearson",clustering_method_columns = "average", bottom_annotation = ra, row_names_max_width = max_text_width(rownames(toplot), gp = gpar(fontsize = 3))),heatmap_legend_side = "left", annotation_legend_side = "left")
tog <- t(heart.psi.complete)
tog <- tog[order(rownames(tog)),]
heart <- heart[order(heart$Samples),]
# identical(heart$Samples,rownames(tog))
anova.res <- as.data.frame(apply(tog,2,function(col){summary(aov(col~heart$Group))[[1]][["Pr(>F)"]][1]}))
colnames(anova.res) <- "Pvalue"
anova.res$FDR <- p.adjust(anova.res$Pvalue,method = "fdr")
head(anova.res)
## Pvalue
## chr10:100231015:100231126:+@chr10:100231246:100231660:+@chr10:100233231:100233443:+ 0.0005203386
## chr10:100373544:100374012:+@chr10:100380895:100381109:+@chr10:100381216:100381448:+ 0.5363823723
## chr10:100373544:100374012:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.6594444183
## chr10:100373568:100374080:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.4545368180
## chr10:102450187:102450330:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.9751767854
## chr10:102450187:102450360:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.9376430047
## FDR
## chr10:100231015:100231126:+@chr10:100231246:100231660:+@chr10:100233231:100233443:+ 0.00790543
## chr10:100373544:100374012:+@chr10:100380895:100381109:+@chr10:100381216:100381448:+ 0.68021411
## chr10:100373544:100374012:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.77250427
## chr10:100373568:100374080:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.62031948
## chr10:102450187:102450330:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.97793542
## chr10:102450187:102450360:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.95151082
anova.sig <- anova.res[which(anova.res$FDR<0.05),]
cat("Number of significant transcripts by anova test: ",nrow(anova.sig))
## Number of significant transcripts by anova test: 320
tab.sig <- tab[which(tab$FDR<0.05),]
cat("\nNumber of significant transcripts by edgeR quasi-likelihood test: ",nrow(tab.sig))
##
## Number of significant transcripts by edgeR quasi-likelihood test: 210
sig.trans <- rownames(anova.sig)[rownames(anova.sig) %in% rownames(tab.sig)]
cat("\nNumber of transcripts shared between anova and edgeR: ",length(sig.trans))
##
## Number of transcripts shared between anova and edgeR: 174
grid.newpage()
venn <- draw.pairwise.venn(area1= nrow(anova.sig),
area2= nrow(tab.sig),
cross.area = length(sig.trans),
category = c("Anova", "EdgeR"))
grid.draw(venn)
# splicing <- c("SE","MXE","RI","A3SS","A5SS")
# splice_type <- c()
# human <- read.delim("~/Desktop/MISO_proj/EuropeanDataset/data/Sample_Metadata_human_Heart-Brain-Only.tsv",sep="\t",header = T,stringsAsFactors = F)
# human <- human[order(human$Source.Name),]
fore.samps <- human$Source.Name[which(human$Characteristics.organism.part.=="forebrain")]
# Combine the matrices into one matrix
# for (splice in splicing){
# x <- read.csv(paste("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/EuropeanDataset/human/cohort_level/miso_counts_",splice,"_mat.csv",sep=""),header=T,row.names = 1,check.names = F)
# colnames(x) <- sub(".counts.txt","",colnames(x))
# x <- x[,order(colnames(x))]
# y <- apply(x,1,function(row){table(is.na(row))["FALSE"] >= half})
# x <- x[y==T,]
# splice_type <- c(splice_type,rep(splice,nrow(x)))
# all_psi <- rbind(all_psi,data.frame(x,SpliceType=splice))
#
# }
fbrain <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="forebrain")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")])
fbrain$Group[which(fbrain$DevelopStage == "19 week post conception")] <- "Fetus"
fbrain$Group[which(fbrain$DevelopStage == "13 week post conception" | fbrain$DevelopStage == "16 week post conception" | fbrain$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "11 week post conception" | fbrain$DevelopStage == "12 week post conception")] <- "Late_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "9 week post conception" | fbrain$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "7 week post conception")] <- "Early_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "4 week post conception" | fbrain$DevelopStage == "5 week post conception" | fbrain$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"
# fbrain$Group[which(fbrain$DevelopStage == "middle adult" | fbrain$DevelopStage == "young adult")] <- "Adult"
# fbrain <- fbrain[-which(fbrain$DevelopStage == "adolescent"),]
# table(fbrain$DevelopStage)
table(fbrain$Group)
##
## adolescent Developing_Embryo Early_Embryo elderly
## 4 4 3 2
## Fetus infant Late_Embryo middle adult
## 5 2 5 2
## Midstage_Embryo neonate school age child toddler
## 4 3 2 3
## VeryEarly_Embryo VeryLate_Embryo young adult
## 5 6 5
fbrain.psi <- all_psi[,fore.samps]
fbrain.psi.complete <- fbrain.psi[complete.cases(fbrain.psi),]
fbrain.psi.complete <- fbrain.psi.complete[,order(colnames(fbrain.psi.complete))]
fbrain.psi.100 <- fbrain.psi.complete * 100 # turn psi values into integers
fbrain.psi.100 <- round(fbrain.psi.100,0)
design <- model.matrix(~fbrain$Group)
y = DGEList(counts=fbrain.psi.100, group = fbrain$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:6)
tab <- as.data.frame(topTags(fit, n=450))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})
# write.table(tab, file = "/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Forebrain_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
| logFC.fbrain.GroupDeveloping_Embryo | logFC.fbrain.GroupEarly_Embryo | logFC.fbrain.Groupelderly | logFC.fbrain.GroupFetus | logFC.fbrain.Groupinfant | logCPM | F | PValue | FDR | Gene | |
|---|---|---|---|---|---|---|---|---|---|---|
| chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+ | 2.913442 | 2.646533 | -0.1412662 | 1.922600 | -0.6306499 | 7.842873 | 24.30047 | 0 | 0.0e+00 | TMEM161B-AS1 |
| chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+ | 3.651425 | 3.581139 | -0.4012360 | 3.600045 | 0.8077651 | 7.518189 | 21.29206 | 0 | 1.0e-07 | AC093155.3 |
| chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+ | -1.710955 | -2.843688 | -0.9209217 | -0.420355 | -0.3024015 | 8.628008 | 19.23880 | 0 | 2.0e-07 | SILC1 |
| chr14:100834632-100834751:+@chr14:100835389-100835549:+ | 1.913827 | 1.961178 | 0.5739953 | 1.544119 | 0.1014239 | 8.838687 | 17.90957 | 0 | 4.0e-07 | MEG3 |
| chr14:100834632-100834751:+@chr14:100835370-100835549:+ | 1.792982 | 1.855878 | 0.5318384 | 1.368526 | 0.1323979 | 8.847548 | 16.83574 | 0 | 8.0e-07 | MEG3 |
| chr14:100834632-100834751:+@chr14:100835293-100835549:+ | 1.466886 | 1.495520 | 0.2660893 | 1.245227 | 0.1109822 | 8.936573 | 15.67299 | 0 | 1.9e-06 | MEG3 |
# translate <- as.data.frame(fread("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/allSplice.mm10lnc.gff3.truncated.tsv"))
top5 <- c("chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+",
"chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+",
"chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+",
"chr14:100834632-100834751:+@chr14:100835389-100835549:+",
"chr14:100834632-100834751:+@chr14:100835370-100835549:+")
top5 <- as.data.frame(fbrain.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
fbrain$Group <- factor(fbrain$Group, levels=c("VeryEarly_Embryo",
"Early_Embryo",
"Developing_Embryo",
"Midstage_Embryo",
"Late_Embryo",
"VeryLate_Embryo",
"Fetus",
"neonate",
"infant",
"toddler",
"school age child",
"adolescent",
"young adult",
"middle adult",
"elderly"))
top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,fbrain,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") +
facet_wrap(~Row.names) + theme_bw() +
theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
strip.text.x = element_text(color="black",face = "bold",size=7))
fbrain <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="forebrain")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")])
fbrain$Group[which(fbrain$DevelopStage == "19 week post conception")] <- "Fetus"
fbrain$Group[which(fbrain$DevelopStage == "13 week post conception" | fbrain$DevelopStage == "16 week post conception" | fbrain$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "11 week post conception" | fbrain$DevelopStage == "12 week post conception")] <- "Late_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "9 week post conception" | fbrain$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "7 week post conception")] <- "Early_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "4 week post conception" | fbrain$DevelopStage == "5 week post conception" | fbrain$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "infant")] <- "toddler"
fbrain$Group[which(fbrain$DevelopStage == "school age child")] <- "adolescent"
fbrain$Group[which(fbrain$DevelopStage == "middle adult" | fbrain$DevelopStage == "young adult")] <- "Adult"
# fbrain <- fbrain[-which(fbrain$DevelopStage == "adolescent"),]
# table(fbrain$DevelopStage)
table(fbrain$Group)
##
## adolescent Adult Developing_Embryo Early_Embryo
## 6 7 4 3
## elderly Fetus Late_Embryo Midstage_Embryo
## 2 5 5 4
## neonate toddler VeryEarly_Embryo VeryLate_Embryo
## 3 5 5 6
fbrain.psi <- all_psi[,fore.samps]
fbrain.psi.complete <- fbrain.psi[complete.cases(fbrain.psi),]
fbrain.psi.complete <- fbrain.psi.complete[,order(colnames(fbrain.psi.complete))]
fbrain.psi.100 <- fbrain.psi.complete * 100 # turn psi values into integers
fbrain.psi.100 <- round(fbrain.psi.100,0)
design <- model.matrix(~fbrain$Group)
y = DGEList(counts=fbrain.psi.100, group = fbrain$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:6)
tab <- as.data.frame(topTags(fit, n=450))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})
# write.table(tab, file = "/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Forebrain_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
| logFC.fbrain.GroupAdult | logFC.fbrain.GroupDeveloping_Embryo | logFC.fbrain.GroupEarly_Embryo | logFC.fbrain.Groupelderly | logFC.fbrain.GroupFetus | logCPM | F | PValue | FDR | Gene | |
|---|---|---|---|---|---|---|---|---|---|---|
| chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+ | -1.3781899 | 2.948105 | 2.877820 | -1.1045623 | 2.8967229 | 7.518165 | 34.84402 | 0 | 0 | AC093155.3 |
| chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+ | -0.1205005 | 2.744842 | 2.477933 | -0.3098665 | 1.7539938 | 7.842851 | 31.65448 | 0 | 0 | TMEM161B-AS1 |
| chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+ | -0.1570974 | -1.721987 | -2.854720 | -0.9319539 | -0.4313871 | 8.628018 | 23.45436 | 0 | 0 | SILC1 |
| chr6:22056546:22056690:+@chr6:22110747:22110920:+@chr6:22191567:22191656:+@chr6:22194045:22194224:+ | -0.6514583 | 2.065047 | 2.256419 | 0.7559075 | 2.4552011 | 7.589467 | 22.30854 | 0 | 0 | CASC15 |
| chr14:100834632-100834751:+@chr14:100835389-100835549:+ | 0.2538369 | 1.778325 | 1.825676 | 0.4384934 | 1.4086168 | 8.838675 | 21.97420 | 0 | 0 | MEG3 |
| chr17:16439037:16439060:+@chr17:16439327:16439393:+@chr17:16439660:16439703:+@chr17:16440185:16440253:+ | -0.6657910 | -2.745684 | -2.848308 | -2.1475977 | -3.5102037 | 5.590671 | 21.46583 | 0 | 0 | SNHG29 |
top5 <- c("chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+",
"chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+",
"chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+",
"chr6:22056546:22056690:+@chr6:22110747:22110920:+@chr6:22191567:22191656:+@chr6:22194045:22194224:+",
"chr14:100834632-100834751:+@chr14:100835370-100835549:+")
top5 <- as.data.frame(fbrain.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
table(fbrain$Group)
##
## adolescent Adult Developing_Embryo Early_Embryo
## 6 7 4 3
## elderly Fetus Late_Embryo Midstage_Embryo
## 2 5 5 4
## neonate toddler VeryEarly_Embryo VeryLate_Embryo
## 3 5 5 6
fbrain$Group <- factor(fbrain$Group, levels=c("VeryEarly_Embryo",
"Early_Embryo",
"Developing_Embryo",
"Midstage_Embryo",
"Late_Embryo",
"VeryLate_Embryo",
"Fetus",
"neonate",
# "infant",
"toddler",
# "school age child",
"adolescent",
# "young adult",
# "middle adult",
"Adult",
"elderly"))
top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,fbrain,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") +
facet_wrap(~Row.names) + theme_bw() +
theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
strip.text.x = element_text(color="black",face = "bold",size=7))
fbrain <- fbrain[order(fbrain$Group),]
ra <- HeatmapAnnotation(Group = fbrain$Group,col = list(Group=c("VeryEarly_Embryo"= "purple",
"Adult" = "coral3",
"Midstage_Embryo"="blue",
"Early_Embryo" = "yellow",
"Late_Embryo" = "brown",
"VeryLate_Embryo"="pink",
"adolescent"= "cyan3",
"Developing_Embryo"="deeppink1",
"Fetus" = "goldenrod",
"neonate" = "skyblue",
# "infant" = "green",
"toddler" = "midnightblue",
"elderly" = "magenta")))
#ha <- HeatmapAnnotation(SpliceType = splice_type, col=list(SpliceType=c("SE"="red","MXE"="blue","RI"="cyan3","A3SS"="yellow","A5SS"="orange")))
lt05 <- rownames(tab)[which(tab$FDR < 0.05)]
toplot <- fbrain.psi.complete[lt05,]
toplot <- medianCtr(toplot) #log2(toplot+1))
toplot <- toplot[,fbrain$Samples]
draw(Heatmap(as.matrix(toplot), cluster_columns = F, name = "PSI",column_title = "Top DE Transcripts (FDR < 0.05)\nhuman Forebrain Samples", clustering_distance_columns = "pearson",clustering_method_columns = "average", bottom_annotation = ra, row_names_max_width = max_text_width(rownames(toplot), gp = gpar(fontsize = 3))),heatmap_legend_side = "left", annotation_legend_side = "left")
tog <- t(fbrain.psi.complete)
tog <- tog[order(rownames(tog)),]
fbrain <- fbrain[order(fbrain$Samples),]
identical(fbrain$Samples,rownames(tog))
## [1] TRUE
anova.res <- as.data.frame(apply(tog,2,function(col){summary(aov(col~fbrain$Group))[[1]][["Pr(>F)"]][1]}))
colnames(anova.res) <- "Pvalue"
anova.res$FDR <- p.adjust(anova.res$Pvalue,method = "fdr")
# head(anova.res)
anova.sig <- anova.res[which(anova.res$FDR<0.05),]
cat("Number of significant transcripts by anova test: ",nrow(anova.sig))
## Number of significant transcripts by anova test: 1000
tab.sig <- tab[which(tab$FDR<0.05),]
cat("\nNumber of significant transcripts by edgeR quasi-likelihood test: ",nrow(tab.sig))
##
## Number of significant transcripts by edgeR quasi-likelihood test: 450
sig.trans <- rownames(anova.sig)[rownames(anova.sig) %in% rownames(tab.sig)]
cat("\nNumber of transcripts shared between anova and edgeR: ",length(sig.trans))
##
## Number of transcripts shared between anova and edgeR: 403
grid.newpage()
venn <- draw.pairwise.venn(area1= nrow(anova.sig),
area2= nrow(tab.sig),
cross.area = length(sig.trans),
category = c("Anova", "EdgeR"))
grid.draw(venn)
# splicing <- c("SE","MXE","RI","A3SS","A5SS")
hind.samps <- human$Source.Name[which(human$Characteristics.organism.part.=="hindbrain")]
hbrain <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="hindbrain")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="hindbrain")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="hindbrain")])
# hbrain$Group[which(hbrain$DevelopStage == "19 week post conception")] <- "Fetus"
hbrain$Group[which(hbrain$DevelopStage == "13 week post conception" | hbrain$DevelopStage == "16 week post conception" | hbrain$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "11 week post conception" | hbrain$DevelopStage == "12 week post conception")] <- "Late_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "9 week post conception" | hbrain$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "7 week post conception")] <- "Early_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "4 week post conception" | hbrain$DevelopStage == "5 week post conception" | hbrain$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"
# hbrain$Group[which(hbrain$DevelopStage == "middle adult" | hbrain$DevelopStage == "young adult")] <- "Adult"
# hbrain <- hbrain[-which(hbrain$DevelopStage == "adolescent"),]
# table(hbrain$DevelopStage)
table(hbrain$Group)
##
## adolescent Developing_Embryo Early_Embryo elderly
## 4 4 3 2
## infant Late_Embryo middle adult Midstage_Embryo
## 3 7 2 4
## neonate school age child toddler VeryEarly_Embryo
## 5 3 2 9
## VeryLate_Embryo young adult
## 6 5
# colnames(all_psi) <- sub("^X","",colnames(all_psi))
hbrain.psi <- all_psi[,hind.samps]
hbrain.psi.complete <- hbrain.psi[complete.cases(hbrain.psi),]
hbrain.psi.complete <- hbrain.psi.complete[,order(colnames(hbrain.psi.complete))]
hbrain.psi.100 <- hbrain.psi.complete * 100 # turn psi values into integers
hbrain.psi.100 <- round(hbrain.psi.100,0)
design <- model.matrix(~hbrain$Group)
y = DGEList(counts=hbrain.psi.100, group = hbrain$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:6)
tab <- as.data.frame(topTags(fit, n=500))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})
# write.table(tab, file = "/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Hindbrain_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
| logFC.hbrain.GroupDeveloping_Embryo | logFC.hbrain.GroupEarly_Embryo | logFC.hbrain.Groupelderly | logFC.hbrain.Groupinfant | logFC.hbrain.GroupLate_Embryo | logCPM | F | PValue | FDR | Gene | |
|---|---|---|---|---|---|---|---|---|---|---|
| chr19:27793000:27793232:@chr19:27778711:27779043:@chr19:27773766:27774252:- | -1.485768 | -2.661664 | 0.9872802 | 0.9549143 | -1.9088499 | 7.017195 | 17.99017 | 0 | 3e-07 | LINC00662 |
| chr2:5968586:5969238:+@chr2:5982298|5982773:5982857:+ | 2.559598 | 2.643883 | 0.2950541 | 1.4673478 | 2.2797178 | 8.318076 | 17.92197 | 0 | 3e-07 | SILC1 |
| chr6:30296132:30296237:@chr6:30289494|30292599:30286703:- | 1.162813 | 1.072769 | 0.3665222 | 0.3020122 | 0.9891289 | 8.680593 | 17.88935 | 0 | 3e-07 | HCG18%2CHCG17 |
| chr11:122292284:122292403:@chr11:122180336:122180398:@chr11:122155551:122155632:@chr11:122089196:122091719:- | -2.389528 | -2.408056 | 0.4106787 | 0.5236948 | -2.3381540 | 6.969478 | 17.51449 | 0 | 3e-07 | MIR100HG |
| chr19:27793000:27793232:@chr19:27778711:27779043:@chr19:27774161:27774792:- | -1.385682 | -2.402827 | 0.8870307 | 0.6670415 | -1.9397886 | 7.141349 | 17.43440 | 0 | 3e-07 | LINC00662 |
| chr14:100834632-100834751:+@chr14:100835293-100835549:+ | 1.113765 | 1.270986 | 0.0202678 | -0.2823901 | 1.1398182 | 8.591863 | 17.05770 | 0 | 3e-07 | MEG3 |
top5 <- c("chr19:27793000:27793232:-@chr19:27778711:27779043:-@chr19:27773766:27774252:-",
"chr2:5968586:5969238:+@chr2:5982298|5982773:5982857:+",
"chr6:30296132:30296237:-@chr6:30289494|30292599:30286703:-",
"chr11:122292284:122292403:-@chr11:122180336:122180398:-@chr11:122155551:122155632:-@chr11:122089196:122091719:-",
"chr19:27793000:27793232:-@chr19:27778711:27779043:-@chr19:27774161:27774792:-")
top5 <- as.data.frame(hbrain.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
hbrain$Group <- factor(hbrain$Group, levels=c("VeryEarly_Embryo",
"Early_Embryo",
"Developing_Embryo",
"Midstage_Embryo",
"Late_Embryo",
"VeryLate_Embryo",
# "Fetus",
"neonate",
"infant",
"toddler",
"school age child",
"adolescent",
"young adult",
"middle adult",
# "Adult",
"elderly"))
top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,hbrain,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") +
facet_wrap(~Row.names) + theme_bw() +
theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
strip.text.x = element_text(color="black",face = "bold",size=7))
new <- hbrain.psi.complete[rownames(tab)[which(tab$FDR<0.05)],]
rownames(new) <- sapply(rownames(new),function(name){paste(translate$GeneSymbol[which(translate$ID==name)],name,sep="_")})
new$transcripts <- rownames(new)
new.melt <- melt(new)
new.melt <- merge(new.melt,hbrain,by.x=2,by.y=1)
# head(new.melt)
hbrain <- hbrain[order(hbrain$Group),]
ra <- HeatmapAnnotation(Group = hbrain$Group,col = list(Group=c("VeryEarly_Embryo"= "purple",
"middle adult" = "coral3",
"Midstage_Embryo"="blue",
"Early_Embryo" = "yellow",
"Late_Embryo" = "brown",
"VeryLate_Embryo"="pink",
"adolescent"= "cyan3",
"Developing_Embryo"="deeppink1",
"Fetus" = "goldenrod",
"neonate" = "skyblue",
"infant" = "green",
"school age child" = "grey60",
"toddler" = "midnightblue",
"young adult" = "black",
"elderly" = "magenta")))
#ha <- HeatmapAnnotation(SpliceType = splice_type, col=list(SpliceType=c("SE"="red","MXE"="blue","RI"="cyan3","A3SS"="yellow","A5SS"="orange")))
lt05 <- rownames(tab)[which(tab$FDR < 0.05)]
toplot <- hbrain.psi.complete[lt05,]
toplot <- medianCtr(toplot) #log2(toplot+1))
draw(Heatmap(as.matrix(toplot), cluster_columns = F, name = "PSI",column_title = "Top DE Transcripts (FDR < 0.05)\nhuman Hindbrain Samples", clustering_distance_columns = "pearson",clustering_method_columns = "average", bottom_annotation = ra, row_names_max_width = max_text_width(rownames(toplot), gp = gpar(fontsize = 3))),heatmap_legend_side = "left", annotation_legend_side = "left")
tog <- t(hbrain.psi.complete)
tog <- tog[order(rownames(tog)),]
hbrain <- hbrain[order(hbrain$Samples),]
# identical(hbrain$Samples,rownames(tog))
anova.res <- as.data.frame(apply(tog,2,function(col){summary(aov(col~hbrain$Group))[[1]][["Pr(>F)"]][1]}))
colnames(anova.res) <- "Pvalue"
anova.res$FDR <- p.adjust(anova.res$Pvalue,method = "fdr")
# head(anova.res)
anova.sig <- anova.res[which(anova.res$FDR<0.05),]
cat("Number of significant transcripts by anova test: ",nrow(anova.sig))
## Number of significant transcripts by anova test: 851
tab.sig <- tab[which(tab$FDR<0.05),]
cat("\nNumber of significant transcripts by edgeR quasi-likelihood test: ",nrow(tab.sig))
##
## Number of significant transcripts by edgeR quasi-likelihood test: 354
sig.trans <- rownames(anova.sig)[rownames(anova.sig) %in% rownames(tab.sig)]
cat("\nNumber of transcripts shared between anova and edgeR: ",length(sig.trans))
##
## Number of transcripts shared between anova and edgeR: 315
grid.newpage()
venn <- draw.pairwise.venn(area1= nrow(anova.sig),
area2= nrow(tab.sig),
cross.area = length(sig.trans),
category = c("Anova", "EdgeR"))
grid.draw(venn)